Data Import

Import dataset

# Import Ziesel dataset
dat <- read.csv("Zeisel_preprocessed.csv", row.names = 1)
cell_type <- read.table("Zeisel_cell_info.txt", sep = "\t", header = 1)

# Get the labels for each cell
cluster_labels <- as.numeric(as.factor(cell_type$level1class))

Divide the dataset by cell type

cell_labels <- unique(cell_type$level1class)
subset_data <- list()
load("meaningful_indices.RData")

for (i in 1:length(cell_labels)){
  label <- cell_labels[i]
  extracted <- dat[which(cell_type$level1class == label), indices]
  hclustered <- hclust(dist(t(extracted)))$order
  reordered <- extracted[, hclustered]
  subset_data[[i]] <- reordered
}

col <- ncol(subset_data[[1]])
cell_num <- length(cell_labels)

Function to draw a heatmap

library(ggplot2)

store_heatmap <- function(heatmap_matrix, method, celltype, abs = F){
  if (abs){
    heatmap_title <- paste0("Abs_Heatmap of ", method,
                            " Correlation\n(", celltype, ")")  
  }
  else{
    heatmap_title <- paste0("Heatmap of ", method,
                            " Correlation\n(", celltype, ")")
  }
  
  heatmap <- ggplot(data = heatmap_matrix, 
                    aes(x = Var2, y = Var1, fill = value)) + 
    geom_tile() +
    scale_fill_viridis_b(option = "D", direction = -1,
                         breaks = round(as.numeric(
      quantile(heatmap_matrix[,3],
               probs = seq(0, 1,length.out = 6))), 1)) +
    theme(axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.background = element_rect(fill = "transparent")) +
    labs(title = heatmap_title)
  return (heatmap)
  }

1. Pearson Correlation

library(reshape2)

pearson_median <- c()
pearson_array <- array(NA, dim = c(col, col, cell_num))

pearson_max <- c()
pearson_max_pair1 <- c()
pearson_max_pair2 <- c()

pearson_min <- c()
pearson_min_pair1 <- c()
pearson_min_pair2 <- c()

pearson_heatmap <- list()

for (i in 1:length(cell_labels)){
  subset_pearson <- cor(subset_data[[i]], method = "pearson")
  
  pearson_median <- c(pearson_median, median(subset_pearson))
  
  subset_pearson[upper.tri(subset_pearson, diag = T)] <- NA
  pearson_array[, , i] <- subset_pearson
  
  pearson_vec <- sort(abs(subset_pearson), decreasing = T)
  
  max_idx <- which(abs(subset_pearson) == pearson_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  pearson_max_pair1 <- c(pearson_max_pair1, idx1)
  pearson_max_pair2 <- c(pearson_max_pair2, idx2)
  
  pearson_max <- c(pearson_max, subset_pearson[idx1, idx2])
  
  min_idx <- which(abs(subset_pearson) == rev(pearson_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  pearson_min_pair1 <- c(pearson_min_pair1, idx1)
  pearson_min_pair2 <- c(pearson_min_pair2, idx2)
  
  pearson_min <- c(pearson_min, subset_pearson[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_pearson, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  pearson_heatmap[[i]] <- store_heatmap(melted_matrix, "Pearson",
                                        cell_labels[i])
}

The pairs with highest pearson coefficient by cell type. (Close to perfect linear relationship)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- pearson_max_pair1[i]; idx2 <- pearson_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(pearson_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest pearson coefficient by cell type. (Close to imperfect linear dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- pearson_min_pair1[i]; idx2 <- pearson_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(pearson_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of Pearson coefficient by cell type

library(gridExtra)
marrangeGrob(pearson_heatmap, nrow = 3, ncol = 3)

2. Spearman Correlation (Spearman’s rho)

spearman_median <- c()
spearman_array <- array(NA, dim = c(col, col, cell_num))

spearman_max <- c()
spearman_max_pair1 <- c()
spearman_max_pair2 <- c()

spearman_min <- c()
spearman_min_pair1 <- c()
spearman_min_pair2 <- c()

spearman_heatmap <- list()

for (i in 1:length(cell_labels)){
  subset_spearman <- cor(subset_data[[i]], method = "spearman")
  
  spearman_median <- c(spearman_median, median(subset_spearman))
  
  subset_spearman[upper.tri(subset_spearman, diag = T)] <- NA
  spearman_array[ , , i] <- subset_spearman
  
  spearman_vec <- sort(abs(subset_spearman), decreasing = T)
  
  max_idx <- which(abs(subset_spearman) == spearman_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  spearman_max_pair1 <- c(spearman_max_pair1, idx1)
  spearman_max_pair2 <- c(spearman_max_pair2, idx2)
  
  spearman_max <- c(spearman_max, subset_spearman[idx1, idx2])
  
  min_idx <- which(abs(subset_spearman) == rev(spearman_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  spearman_min_pair1 <- c(spearman_min_pair1, idx1)
  spearman_min_pair2 <- c(spearman_min_pair2, idx2)
  
  spearman_min <- c(spearman_min, subset_spearman[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_spearman, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  spearman_heatmap[[i]] <- store_heatmap(melted_matrix, "Spearman",
                                        cell_labels[i])
}

The pairs with highest spearman’s rho by cell type. (Close to perfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- spearman_max_pair1[i]; idx2 <- spearman_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(spearman_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest spearman’s rho by cell type. (Close to imperfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- spearman_min_pair1[i]; idx2 <- spearman_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(spearman_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of Spearman’s rho by cell type

marrangeGrob(spearman_heatmap, nrow = 3, ncol = 3)

3. Kendall’s tau

library(pcaPP)

kendall_median <- c()
kendall_array <- array(NA, dim = c(col, col, cell_num))

kendall_max <- c()
kendall_max_pair1 <- c()
kendall_max_pair2 <- c()

kendall_min <- c()
kendall_min_pair1 <- c()
kendall_min_pair2 <- c()

kendall_heatmap <- list()

for (i in 1:length(cell_labels)){
  subset_kendall <- cor.fk(subset_data[[i]])
  
  kendall_median <- c(kendall_median, median(subset_kendall))
  
  subset_kendall[upper.tri(subset_kendall, diag = T)] <- NA
  kendall_array[, , i] <- subset_kendall
  
  kendall_vec <- sort(abs(subset_kendall), decreasing = T)
  
  max_idx <- which(abs(subset_kendall) == kendall_vec[i], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  kendall_max_pair1 <- c(kendall_max_pair1, idx1)
  kendall_max_pair2 <- c(kendall_max_pair2, idx2)
  
  kendall_max <- c(kendall_max, subset_kendall[idx1, idx2])
  
  min_idx <- which(abs(subset_kendall) == rev(kendall_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  kendall_min_pair1 <- c(kendall_min_pair1, idx1)
  kendall_min_pair2 <- c(kendall_min_pair2, idx2)
  
  kendall_min <- c(kendall_min, subset_kendall[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_kendall, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  kendall_heatmap[[i]] <- store_heatmap(melted_matrix, "Kendall",
                                        cell_labels[i])
}

The pairs with highest kendall’s tau by cell type. (Close to perfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- kendall_max_pair1[i]; idx2 <- kendall_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(kendall_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest kendall’s tau by cell type. (Close to imperfect monotonically dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- kendall_min_pair1[i]; idx2 <- kendall_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(kendall_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of Kendall’s tau by cell type

marrangeGrob(kendall_heatmap, nrow = 3, ncol = 3)

4. Distance Correlation

library(energy)

dist_median <- c()
dist_array <- array(NA, dim = c(col, col, cell_num))

dist_max <- c()
dist_max_pair1 <- c()
dist_max_pair2 <- c()

dist_min <- c()
dist_min_pair1 <- c()
dist_min_pair2 <- c()

dist_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_dist <- matrix(nrow = ncol(sub_dat),
                         ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
      subset_dist[j,k] <- dcor(as.numeric(sub_dat[, j]),
                                as.numeric(sub_dat[, k]))
    }
  }
  dist_array[ , , i] <- subset_dist
  
  dist_median <- c(dist_median, median(subset_dist, na.rm = T))

  dist_vec <- sort(abs(subset_dist), decreasing = T)
  
  max_idx <- which(abs(subset_dist) == dist_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  dist_max_pair1 <- c(dist_max_pair1, idx1)
  dist_max_pair2 <- c(dist_max_pair2, idx2)
  
  dist_max <- c(dist_max, subset_dist[idx1, idx2])
  
  min_idx <- which(abs(subset_dist) == rev(dist_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  dist_min_pair1 <- c(dist_min_pair1, idx1)
  dist_min_pair2 <- c(dist_min_pair2, idx2)
  
  dist_min <- c(dist_min, subset_dist[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_dist, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  dist_heatmap[[i]] <- store_heatmap(melted_matrix, "Dist.Cor",
                                        cell_labels[i])
}

The pairs with highest distance correlation by cell type. (Close to perfect linear dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- dist_max_pair1[i]; idx2 <- dist_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(dist_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest distance correlation by cell type. (Close to imperfect linear dependence)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- dist_min_pair1[i]; idx2 <- dist_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(dist_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of distance correlation by cell type

marrangeGrob(dist_heatmap, nrow = 3, ncol = 3)

5. Hoeffding’s D measure

library(Hmisc)

hoeff_median <- c()
hoeff_array <- array(NA, dim = c(col, col, cell_num))

hoeff_max <- c()
hoeff_max_pair1 <- c()
hoeff_max_pair2 <- c()

hoeff_min <- c()
hoeff_min_pair1 <- c()
hoeff_min_pair2 <- c()

hoeff_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  
  subset_hoeff <- hoeffd(x = as.matrix(sub_dat))$D
  
  hoeff_median <- c(hoeff_median, median(subset_hoeff, na.rm = T))
  
  subset_hoeff[upper.tri(subset_hoeff, diag = T)] <- NA
  hoeff_array[, , i] <- subset_hoeff
  
  hoeff_vec <- sort(abs(subset_hoeff), decreasing = T)
  
  max_idx <- which(abs(subset_hoeff) == hoeff_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  hoeff_max_pair1 <- c(hoeff_max_pair1, idx1)
  hoeff_max_pair2 <- c(hoeff_max_pair2, idx2)
  
  hoeff_max <- c(hoeff_max, subset_hoeff[idx1, idx2])
  
  min_idx <- which(abs(subset_hoeff) == rev(hoeff_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  hoeff_min_pair1 <- c(hoeff_min_pair1, idx1)
  hoeff_min_pair2 <- c(hoeff_min_pair2, idx2)
  
  hoeff_min <- c(hoeff_min, subset_hoeff[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_hoeff, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  hoeff_heatmap[[i]] <- store_heatmap(melted_matrix, "Hoeffding's D",
                                        cell_labels[i])
}

The pairs with highest hoeffiding’s D by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- hoeff_max_pair1[i]; idx2 <- hoeff_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(hoeff_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest hoeffiding’s D by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- hoeff_min_pair1[i]; idx2 <- hoeff_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(hoeff_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of Hoeffiding’s D by cell type

marrangeGrob(hoeff_heatmap, nrow = 3, ncol = 3)

6. Mutual information (MI)

library(entropy)

MI_median <- c()
MI_array <- array(NA, dim = c(col, col, cell_num))

MI_max <- c()
MI_max_pair1 <- c()
MI_max_pair2 <- c()

MI_min <- c()
MI_min_pair1 <- c()
MI_min_pair2 <- c()

MI_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_MI <- matrix(nrow = ncol(sub_dat),
                      ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
    y2d <- discretize2d(as.matrix(sub_dat[, j]),
                                   as.matrix(sub_dat[, k]),
                                   numBins1 = 20,
                                   numBins2 = 20)
    subset_MI[j,k] <- as.numeric(mi.empirical(y2d))
    }
  }
  MI_array[, , i] <- subset_MI
  
  MI_median <- c(MI_median, median(subset_MI, na.rm = T))
  
  MI_vec <- sort(abs(subset_MI), decreasing = T)
  
  max_idx <- which(abs(subset_MI) == MI_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  MI_max_pair1 <- c(MI_max_pair1, idx1)
  MI_max_pair2 <- c(MI_max_pair2, idx2)
  
  MI_max <- c(MI_max, subset_MI[idx1, idx2])
  
  min_idx <- which(abs(subset_MI) == rev(MI_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  MI_min_pair1 <- c(MI_min_pair1, idx1)
  MI_min_pair2 <- c(MI_min_pair2, idx2)
  
  MI_min <- c(MI_min, subset_MI[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_MI, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  MI_heatmap[[i]] <- store_heatmap(melted_matrix, "MI",
                                        cell_labels[i])
}

The pairs with highest MI by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MI_max_pair1[i]; idx2 <- MI_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MI_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest MI by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MI_min_pair1[i]; idx2 <- MI_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MI_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of MI by cell type

marrangeGrob(MI_heatmap, nrow = 3, ncol = 3)

7. Maximum Information Coefficient (MIC)

library(minerva)

MIC_median <- c()
MIC_array <- array(NA, dim = c(col, col, cell_num))

MIC_max <- c()
MIC_max_pair1 <- c()
MIC_max_pair2 <- c()

MIC_min <- c()
MIC_min_pair1 <- c()
MIC_min_pair2 <- c()

MIC_heatmap <- c()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  cor_MIC <- mine(sub_dat)

  subset_MIC <- mine(sub_dat)$MIC
  
  MIC_median <- c(MIC_median, median(subset_MIC, na.rm = T))
  
  subset_MIC[upper.tri(subset_MIC, diag = T)] <- NA
  MIC_array[, , i] <- subset_MIC
  
  MIC_vec <- sort(abs(subset_MIC), decreasing = T)
  
  max_idx <- which(abs(subset_MIC) == MIC_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  MIC_max_pair1 <- c(MIC_max_pair1, idx1)
  MIC_max_pair2 <- c(MIC_max_pair2, idx2)
  
  MIC_max <- c(MIC_max, subset_MIC[idx1, idx2])
  
  min_idx <- which(abs(subset_MIC) == rev(MIC_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  MIC_min_pair1 <- c(MIC_min_pair1, idx1)
  MIC_min_pair2 <- c(MIC_min_pair2, idx2)
  
  MIC_min <- c(MIC_min, subset_MIC[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_MIC, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  MIC_heatmap[[i]] <- store_heatmap(melted_matrix, "MIC",
                                        cell_labels[i])
}

The pairs with highest MIC by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MIC_max_pair1[i]; idx2 <- MIC_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MIC_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest MIC by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- MIC_min_pair1[i]; idx2 <- MIC_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(MIC_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of MIC by cell type

marrangeGrob(MIC_heatmap, nrow = 3, ncol = 3)

8. Chatterjee’s method (XI)

library(XICOR)

XI_median <- c()
XI_array <- array(NA, dim = c(col, col, cell_num))

XI_max <- c()
XI_max_pair1 <- c()
XI_max_pair2 <- c()

XI_min <- c()
XI_min_pair1 <- c()
XI_min_pair2 <- c()

XI_heatmap <- list()

for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  subset_XI <- matrix(nrow = ncol(sub_dat),
                      ncol = ncol(sub_dat))

  for (j in 2:ncol(sub_dat)){
    for (k in 1:(j-1)){
      subset_XI[j,k] <- calculateXI(as.numeric(sub_dat[, j]),
                                    as.numeric(sub_dat[, k]))
    }
  }
  XI_array[, , i] <- subset_XI
  
  XI_median <- c(XI_median, median(subset_XI, na.rm = T))
  
  XI_vec <- sort(abs(subset_XI), decreasing = T)
  
  max_idx <- which(abs(subset_XI) == XI_vec[1], arr.ind = T)
  idx1 <- max_idx[1]; idx2 <- max_idx[1,2]
  
  XI_max_pair1 <- c(XI_max_pair1, idx1)
  XI_max_pair2 <- c(XI_max_pair2, idx2)
  
  XI_max <- c(XI_max, subset_XI[idx1, idx2])
  
  min_idx <- which(abs(subset_XI) == rev(XI_vec)[1], arr.ind = T)
  idx1 <- min_idx[1]; idx2 <- min_idx[1,2]
  
  XI_min_pair1 <- c(XI_min_pair1, idx1)
  XI_min_pair2 <- c(XI_min_pair2, idx2)
  
  XI_min <- c(XI_min, subset_XI[idx1, idx2])
  
  # Store a heatmap
  melted_matrix <- melt(subset_XI, na.rm = T)
  
  # ggplot to draw a heatmap with the viridis palette
  XI_heatmap[[i]] <- store_heatmap(melted_matrix, "XI",
                                        cell_labels[i])
}

The pairs with highest XI by cell type. (Highly correlated)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- XI_max_pair1[i]; idx2 <- XI_max_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(XI_max[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

The pairs with lowest XI by cell type. (Seemingly independent)

par(mfrow = c(2,4))
for (i in 1:length(cell_labels)){
  sub_dat <- subset_data[[i]]
  idx1 <- XI_min_pair1[i]; idx2 <- XI_min_pair2[i]
  plot(sub_dat[,idx1], sub_dat[,idx2], asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(XI_min[i], 3),
                    "\nCell_type: ", cell_labels[i]))
}

Heatmap of XI by cell type

marrangeGrob(XI_heatmap, nrow = 3, ncol = 3)

Median of correlation values by cell type and different measures.

median_df <- rbind(pearson_median, spearman_median,
                   kendall_median, dist_median,
                   hoeff_median, MI_median,
                   MIC_median, XI_median)
rownames(median_df) <- c("Pearson", "Spearman", "Kendall",
                         "Dist_Cor", "Hoeff_Dist", "Mutual_Info",
                         "MIC", "XI")
colnames(median_df) <- cell_labels

knitr::kable(median_df)
interneurons pyramidal SS pyramidal CA1 oligodendrocytes microglia endothelial-mural astrocytes_ependymal
Pearson 0.0509848 0.0656105 0.0986192 0.0543692 0.0794495 0.0642126 0.0809745
Spearman 0.0706406 0.0944663 0.1462104 0.0657028 0.0912916 0.0757885 0.0887166
Kendall 0.0478463 0.0639916 0.0985284 0.0447184 0.0616453 0.0513912 0.0606983
Dist_Cor 0.2809993 0.2648030 0.2530265 0.2560694 0.2838688 0.2674959 0.2928014
Hoeff_Dist 0.0216043 0.0206498 0.0192409 0.0164181 0.0150793 0.0175061 0.0194390
Mutual_Info 0.4555029 0.3458245 0.1830625 0.2110773 0.8758591 0.4925920 0.5304974
MIC 0.2573247 0.2347593 0.1873319 0.1969538 0.2853231 0.2551104 0.2613845
XI 0.0740912 0.0679460 0.0588543 0.0611467 0.0587316 0.0604628 0.0794619